Permuation based Mutual Exclusive and Co-occurrence for group of items


In [4]:
import numpy as np
import pandas as pd
import scipy.sparse as sparse
import random

This is the python implementation of the MutEx algorithm described in the PhD Thesis "Analysis and visualization of multidimensional cancer genomics data" : http://www.tdx.cat/handle/10803/301436

The same algorithm is also implement in Java in the Gitools software: http://www.gitools.org

Create a random sparse DataFrame reperesenting mutations


In [5]:
row, col = 100, 100
np.random.seed(77)
df = pd.DataFrame(sparse.random(row, col, density=0.15).A).apply(np.ceil)

df.loc[0] = [1 if x < 20 else 0 for x in range(0, df.shape[1])]
df.loc[1] = [1 if x > 13 and x < 35 else 0 for x in range(0, df.shape[1])]
df.loc[2] = [1 if x > 80 else 0 for x in range(0, df.shape[1])]

df.columns = ['s' + str(x) for x in df.columns]
df.index = ['gene' + str(x) for x in df.index]

pd.set_option('display.max_columns', 1000)
df.head()


Out[5]:
s0 s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 s16 s17 s18 s19 s20 s21 s22 s23 s24 s25 s26 s27 s28 s29 s30 s31 s32 s33 s34 s35 s36 s37 s38 s39 s40 s41 s42 s43 s44 s45 s46 s47 s48 s49 s50 s51 s52 s53 s54 s55 s56 s57 s58 s59 s60 s61 s62 s63 s64 s65 s66 s67 s68 s69 s70 s71 s72 s73 s74 s75 s76 s77 s78 s79 s80 s81 s82 s83 s84 s85 s86 s87 s88 s89 s90 s91 s92 s93 s94 s95 s96 s97 s98 s99
gene0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
gene1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
gene2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
gene3 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1
gene4 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

We import mutex and create a MutEx instance

The MutEx has to be created with the full data matrix - so it has the correct background event (mutation) rate for both samples and genes


In [6]:
from mutex import MutEx

In [7]:
m = MutEx(background=df, permutations=2000)

Example run


In [8]:
%time r = m.calculate(['gene4', 'gene5', 'gene6'], parallel=False)
print(r)


CPU times: user 34.9 ms, sys: 4.65 ms, total: 39.6 ms
Wall time: 6.04 s
MuTexResult
  Zscore:                     -0.4103513279251402
  Mutual Exclusive p-value:   0.7515
  Co-occurence p-value:       0.4245
  Permutations:               2000
  Sample Coverage:            40.0
  Signal:                     48.0

Example run , multi-threaded


In [9]:
%time r = m.calculate(['gene0', 'gene1', 'gene2'])
print(r)


CPU times: user 106 ms, sys: 83.7 ms, total: 190 ms
Wall time: 891 ms
MuTexResult
  Zscore:                     2.5381965020057105
  Mutual Exclusive p-value:   0.0125
  Co-occurence p-value:       1.0
  Permutations:               2000
  Sample Coverage:            54.0
  Signal:                     60.0

Example with many groups


In [10]:
#some random groups and put it in a generator (alternatively list)
random.seed(18)
group_generator = (random.sample( df.index.tolist(), random.sample([2,3,4], 1)[0]) for x in range(10) )

In [11]:
result_list = [m.calculate(g) for g in group_generator]
result_df = pd.DataFrame.from_records([r.__dict__ for r in result_list])
result_df[['items','coverage', 'signal', 'mutex_pvalue', 'co_occurence_pvalue', 'signal_coverage_ratio', 'mean_sim_coverage']].sort_values('mutex_pvalue')


Out[11]:
items coverage signal mutex_pvalue co_occurence_pvalue signal_coverage_ratio mean_sim_coverage
4 [gene32, gene88] 39 39 0.0160 1.0000 1.000000 35.2355
1 [gene42, gene30, gene25] 48 54 0.1175 0.9320 0.888889 44.7300
6 [gene97, gene22, gene30, gene21] 55 68 0.1640 0.9055 0.808824 51.8105
8 [gene73, gene64, gene25] 46 53 0.2595 0.8715 0.867925 44.1585
7 [gene25, gene94] 34 36 0.3595 0.8325 0.944444 32.8900
2 [gene80, gene63, gene23] 42 48 0.4710 0.6740 0.875000 41.3335
3 [gene37, gene58, gene33] 39 47 0.7530 0.4680 0.829787 39.7870
0 [gene15, gene84] 28 31 0.8425 0.3445 0.903226 28.8570
9 [gene96, gene63, gene27, gene91] 43 56 0.8555 0.2500 0.767857 44.9080
5 [gene41, gene66] 29 36 0.9880 0.0120 0.805556 32.6725

Big matrix


In [12]:
row, col = 10000, 800
np.random.seed(77)
df = pd.DataFrame(sparse.random(row, col).A).apply(np.ceil)

df.columns = ['s' + str(x) for x in df.columns]
df.index = ['gene' + str(x) for x in df.index]

pd.set_option('display.max_columns', 1000)
df.head()


Out[12]:
s0 s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 s16 s17 s18 s19 s20 s21 s22 s23 s24 s25 s26 s27 s28 s29 s30 s31 s32 s33 s34 s35 s36 s37 s38 s39 s40 s41 s42 s43 s44 s45 s46 s47 s48 s49 s50 s51 s52 s53 s54 s55 s56 s57 s58 s59 s60 s61 s62 s63 s64 s65 s66 s67 s68 s69 s70 s71 s72 s73 s74 s75 s76 s77 s78 s79 s80 s81 s82 s83 s84 s85 s86 s87 s88 s89 s90 s91 s92 s93 s94 s95 s96 s97 s98 s99 s100 s101 s102 s103 s104 s105 s106 s107 s108 s109 s110 s111 s112 s113 s114 s115 s116 s117 s118 s119 s120 s121 s122 s123 s124 s125 s126 s127 s128 s129 s130 s131 s132 s133 s134 s135 s136 s137 s138 s139 s140 s141 s142 s143 s144 s145 s146 s147 s148 s149 s150 s151 s152 s153 s154 s155 s156 s157 s158 s159 s160 s161 s162 s163 s164 s165 s166 s167 s168 s169 s170 s171 s172 s173 s174 s175 s176 s177 s178 s179 s180 s181 s182 s183 s184 s185 s186 s187 s188 s189 s190 s191 s192 s193 s194 s195 s196 s197 s198 s199 s200 s201 s202 s203 s204 s205 s206 s207 s208 s209 s210 s211 s212 s213 s214 s215 s216 s217 s218 s219 s220 s221 s222 s223 s224 s225 s226 s227 s228 s229 s230 s231 s232 s233 s234 s235 s236 s237 s238 s239 s240 s241 s242 s243 s244 s245 s246 s247 s248 s249 s250 s251 s252 s253 s254 s255 s256 s257 s258 s259 s260 s261 s262 s263 s264 s265 s266 s267 s268 s269 s270 s271 s272 s273 s274 s275 s276 s277 s278 s279 s280 s281 s282 s283 s284 s285 s286 s287 s288 s289 s290 s291 s292 s293 s294 s295 s296 s297 s298 s299 s300 s301 s302 s303 s304 s305 s306 s307 s308 s309 s310 s311 s312 s313 s314 s315 s316 s317 s318 s319 s320 s321 s322 s323 s324 s325 s326 s327 s328 s329 s330 s331 s332 s333 s334 s335 s336 s337 s338 s339 s340 s341 s342 s343 s344 s345 s346 s347 s348 s349 s350 s351 s352 s353 s354 s355 s356 s357 s358 s359 s360 s361 s362 s363 s364 s365 s366 s367 s368 s369 s370 s371 s372 s373 s374 s375 s376 s377 s378 s379 s380 s381 s382 s383 s384 s385 s386 s387 s388 s389 s390 s391 s392 s393 s394 s395 s396 s397 s398 s399 s400 s401 s402 s403 s404 s405 s406 s407 s408 s409 s410 s411 s412 s413 s414 s415 s416 s417 s418 s419 s420 s421 s422 s423 s424 s425 s426 s427 s428 s429 s430 s431 s432 s433 s434 s435 s436 s437 s438 s439 s440 s441 s442 s443 s444 s445 s446 s447 s448 s449 s450 s451 s452 s453 s454 s455 s456 s457 s458 s459 s460 s461 s462 s463 s464 s465 s466 s467 s468 s469 s470 s471 s472 s473 s474 s475 s476 s477 s478 s479 s480 s481 s482 s483 s484 s485 s486 s487 s488 s489 s490 s491 s492 s493 s494 s495 s496 s497 s498 s499 s500 s501 s502 s503 s504 s505 s506 s507 s508 s509 s510 s511 s512 s513 s514 s515 s516 s517 s518 s519 s520 s521 s522 s523 s524 s525 s526 s527 s528 s529 s530 s531 s532 s533 s534 s535 s536 s537 s538 s539 s540 s541 s542 s543 s544 s545 s546 s547 s548 s549 s550 s551 s552 s553 s554 s555 s556 s557 s558 s559 s560 s561 s562 s563 s564 s565 s566 s567 s568 s569 s570 s571 s572 s573 s574 s575 s576 s577 s578 s579 s580 s581 s582 s583 s584 s585 s586 s587 s588 s589 s590 s591 s592 s593 s594 s595 s596 s597 s598 s599 s600 s601 s602 s603 s604 s605 s606 s607 s608 s609 s610 s611 s612 s613 s614 s615 s616 s617 s618 s619 s620 s621 s622 s623 s624 s625 s626 s627 s628 s629 s630 s631 s632 s633 s634 s635 s636 s637 s638 s639 s640 s641 s642 s643 s644 s645 s646 s647 s648 s649 s650 s651 s652 s653 s654 s655 s656 s657 s658 s659 s660 s661 s662 s663 s664 s665 s666 s667 s668 s669 s670 s671 s672 s673 s674 s675 s676 s677 s678 s679 s680 s681 s682 s683 s684 s685 s686 s687 s688 s689 s690 s691 s692 s693 s694 s695 s696 s697 s698 s699 s700 s701 s702 s703 s704 s705 s706 s707 s708 s709 s710 s711 s712 s713 s714 s715 s716 s717 s718 s719 s720 s721 s722 s723 s724 s725 s726 s727 s728 s729 s730 s731 s732 s733 s734 s735 s736 s737 s738 s739 s740 s741 s742 s743 s744 s745 s746 s747 s748 s749 s750 s751 s752 s753 s754 s755 s756 s757 s758 s759 s760 s761 s762 s763 s764 s765 s766 s767 s768 s769 s770 s771 s772 s773 s774 s775 s776 s777 s778 s779 s780 s781 s782 s783 s784 s785 s786 s787 s788 s789 s790 s791 s792 s793 s794 s795 s796 s797 s798 s799
gene0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
gene1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
gene2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
gene3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
gene4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0

In [13]:
m = MutEx(df, permutations=10000)
group = random.sample( df.index.tolist(), 20)
%time r = m.calculate(group)
r


CPU times: user 4.7 s, sys: 4.28 s, total: 8.98 s
Wall time: 12.7 s
Out[13]:
MuTexResult
  Zscore:                     1.5955557182649056
  Mutual Exclusive p-value:   0.0722
  Co-occurence p-value:       0.961
  Permutations:               10000
  Sample Coverage:            132.0
  Signal:                     138.0

Comparing simulation with 'original' data

We compute the mutational burden per sample and do 500 simulations of the mutation matrix


In [14]:
sample_mut_orig = df.apply(sum)

In [79]:
sample_mut_orig.head()


Out[79]:
s0     98
s1    104
s2     86
s3     98
s4    118
dtype: float64

In [17]:
from pandas.tools.plotting import parallel_coordinates
import matplotlib.pylab as pylab
%matplotlib inline

The 500 simulations of the mutation matrix - parallel execution


In [73]:
from functools import partial
import multiprocessing as mp
pool = mp.Pool(processes=mp.cpu_count())
def dosim(m,x):
    np.random.seed(x)
    return m._simulate_observations(df.apply(sum, axis=1)).apply(sum)
partial_simul = partial(dosim)
n = 500
%time simulated_results = pool.starmap(partial_simul.func, zip(itertools.repeat(m,n), range(n)) )
pool.close()  # we are not adding any more processes
pool.join()  # tell it to wait until all threads are done before going on


CPU times: user 5.58 s, sys: 6.73 s, total: 12.3 s
Wall time: 3min 5s

calculate mean and standard deviation of simulated mutational burden for each sample


In [74]:
sim_sample_muts = pd.concat(simulated_results, axis=1).transpose()
sim_sample_muts.columns = df.columns
sim_std = sim_sample_muts.apply(np.std)
sim_mean = sim_sample_muts.apply(np.mean)
sim_sample_muts.head()


Out[74]:
s0 s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 s16 s17 s18 s19 s20 s21 s22 s23 s24 s25 s26 s27 s28 s29 s30 s31 s32 s33 s34 s35 s36 s37 s38 s39 s40 s41 s42 s43 s44 s45 s46 s47 s48 s49 s50 s51 s52 s53 s54 s55 s56 s57 s58 s59 s60 s61 s62 s63 s64 s65 s66 s67 s68 s69 s70 s71 s72 s73 s74 s75 s76 s77 s78 s79 s80 s81 s82 s83 s84 s85 s86 s87 s88 s89 s90 s91 s92 s93 s94 s95 s96 s97 s98 s99 s100 s101 s102 s103 s104 s105 s106 s107 s108 s109 s110 s111 s112 s113 s114 s115 s116 s117 s118 s119 s120 s121 s122 s123 s124 s125 s126 s127 s128 s129 s130 s131 s132 s133 s134 s135 s136 s137 s138 s139 s140 s141 s142 s143 s144 s145 s146 s147 s148 s149 s150 s151 s152 s153 s154 s155 s156 s157 s158 s159 s160 s161 s162 s163 s164 s165 s166 s167 s168 s169 s170 s171 s172 s173 s174 s175 s176 s177 s178 s179 s180 s181 s182 s183 s184 s185 s186 s187 s188 s189 s190 s191 s192 s193 s194 s195 s196 s197 s198 s199 s200 s201 s202 s203 s204 s205 s206 s207 s208 s209 s210 s211 s212 s213 s214 s215 s216 s217 s218 s219 s220 s221 s222 s223 s224 s225 s226 s227 s228 s229 s230 s231 s232 s233 s234 s235 s236 s237 s238 s239 s240 s241 s242 s243 s244 s245 s246 s247 s248 s249 s250 s251 s252 s253 s254 s255 s256 s257 s258 s259 s260 s261 s262 s263 s264 s265 s266 s267 s268 s269 s270 s271 s272 s273 s274 s275 s276 s277 s278 s279 s280 s281 s282 s283 s284 s285 s286 s287 s288 s289 s290 s291 s292 s293 s294 s295 s296 s297 s298 s299 s300 s301 s302 s303 s304 s305 s306 s307 s308 s309 s310 s311 s312 s313 s314 s315 s316 s317 s318 s319 s320 s321 s322 s323 s324 s325 s326 s327 s328 s329 s330 s331 s332 s333 s334 s335 s336 s337 s338 s339 s340 s341 s342 s343 s344 s345 s346 s347 s348 s349 s350 s351 s352 s353 s354 s355 s356 s357 s358 s359 s360 s361 s362 s363 s364 s365 s366 s367 s368 s369 s370 s371 s372 s373 s374 s375 s376 s377 s378 s379 s380 s381 s382 s383 s384 s385 s386 s387 s388 s389 s390 s391 s392 s393 s394 s395 s396 s397 s398 s399 s400 s401 s402 s403 s404 s405 s406 s407 s408 s409 s410 s411 s412 s413 s414 s415 s416 s417 s418 s419 s420 s421 s422 s423 s424 s425 s426 s427 s428 s429 s430 s431 s432 s433 s434 s435 s436 s437 s438 s439 s440 s441 s442 s443 s444 s445 s446 s447 s448 s449 s450 s451 s452 s453 s454 s455 s456 s457 s458 s459 s460 s461 s462 s463 s464 s465 s466 s467 s468 s469 s470 s471 s472 s473 s474 s475 s476 s477 s478 s479 s480 s481 s482 s483 s484 s485 s486 s487 s488 s489 s490 s491 s492 s493 s494 s495 s496 s497 s498 s499 s500 s501 s502 s503 s504 s505 s506 s507 s508 s509 s510 s511 s512 s513 s514 s515 s516 s517 s518 s519 s520 s521 s522 s523 s524 s525 s526 s527 s528 s529 s530 s531 s532 s533 s534 s535 s536 s537 s538 s539 s540 s541 s542 s543 s544 s545 s546 s547 s548 s549 s550 s551 s552 s553 s554 s555 s556 s557 s558 s559 s560 s561 s562 s563 s564 s565 s566 s567 s568 s569 s570 s571 s572 s573 s574 s575 s576 s577 s578 s579 s580 s581 s582 s583 s584 s585 s586 s587 s588 s589 s590 s591 s592 s593 s594 s595 s596 s597 s598 s599 s600 s601 s602 s603 s604 s605 s606 s607 s608 s609 s610 s611 s612 s613 s614 s615 s616 s617 s618 s619 s620 s621 s622 s623 s624 s625 s626 s627 s628 s629 s630 s631 s632 s633 s634 s635 s636 s637 s638 s639 s640 s641 s642 s643 s644 s645 s646 s647 s648 s649 s650 s651 s652 s653 s654 s655 s656 s657 s658 s659 s660 s661 s662 s663 s664 s665 s666 s667 s668 s669 s670 s671 s672 s673 s674 s675 s676 s677 s678 s679 s680 s681 s682 s683 s684 s685 s686 s687 s688 s689 s690 s691 s692 s693 s694 s695 s696 s697 s698 s699 s700 s701 s702 s703 s704 s705 s706 s707 s708 s709 s710 s711 s712 s713 s714 s715 s716 s717 s718 s719 s720 s721 s722 s723 s724 s725 s726 s727 s728 s729 s730 s731 s732 s733 s734 s735 s736 s737 s738 s739 s740 s741 s742 s743 s744 s745 s746 s747 s748 s749 s750 s751 s752 s753 s754 s755 s756 s757 s758 s759 s760 s761 s762 s763 s764 s765 s766 s767 s768 s769 s770 s771 s772 s773 s774 s775 s776 s777 s778 s779 s780 s781 s782 s783 s784 s785 s786 s787 s788 s789 s790 s791 s792 s793 s794 s795 s796 s797 s798 s799
0 100 133 74 79 123 97 110 94 78 88 93 102 90 95 99 96 80 99 85 129 134 103 118 115 108 99 108 93 135 122 78 80 105 119 122 102 102 98 89 102 94 104 111 103 102 119 122 104 94 102 85 101 102 97 95 104 82 111 102 119 119 109 112 96 110 97 103 99 129 126 104 88 96 92 123 107 124 112 93 83 107 125 90 93 117 112 103 134 97 119 108 116 84 113 97 86 119 93 120 98 97 104 105 126 129 105 116 95 94 88 102 93 84 98 96 110 80 100 113 101 110 113 90 88 85 95 87 113 82 91 78 120 116 83 100 103 101 110 97 95 103 94 93 94 107 105 114 104 93 111 67 101 128 94 77 93 84 109 141 131 108 86 107 128 79 111 95 94 71 100 97 107 98 81 107 91 81 123 73 97 97 113 114 108 114 95 80 90 76 107 90 116 85 100 85 115 96 117 109 78 90 127 101 121 127 101 95 107 97 81 118 73 109 113 101 93 96 106 102 107 108 82 94 116 108 97 109 99 104 80 100 90 96 92 101 106 91 114 86 106 96 109 79 112 90 107 99 98 98 92 104 101 107 137 107 89 112 82 96 99 84 108 94 99 123 72 93 103 121 98 86 89 110 92 82 99 87 90 67 104 115 106 88 122 78 117 106 87 90 105 107 82 92 104 85 71 79 95 89 100 86 81 91 100 99 89 100 100 98 97 78 114 86 100 99 85 101 112 100 82 128 83 135 92 109 78 102 92 76 104 110 91 116 95 86 111 106 114 79 104 99 108 90 78 116 112 77 102 133 108 80 99 89 94 89 101 78 123 117 98 92 102 96 82 121 92 118 112 89 101 90 102 106 104 86 95 104 88 94 106 85 143 95 102 89 84 119 68 110 105 109 87 89 87 100 98 84 117 92 114 104 100 105 92 102 97 119 82 93 96 75 111 113 94 79 91 90 130 145 83 68 102 76 114 107 103 89 94 98 103 107 89 107 99 106 91 113 121 101 118 114 103 109 106 101 102 93 106 94 90 85 99 78 101 101 94 131 120 97 81 108 93 131 84 114 102 101 119 75 89 101 93 114 98 114 102 85 112 105 93 108 96 117 90 128 110 72 116 103 85 96 100 97 90 97 110 97 107 86 98 76 87 115 108 87 83 100 104 97 91 108 112 114 77 99 102 87 106 107 76 86 110 78 107 97 95 114 77 69 113 90 93 74 100 109 109 89 104 92 105 110 106 78 89 117 101 103 106 104 84 82 107 103 96 101 99 110 90 120 121 100 99 86 96 101 81 104 85 112 94 80 72 95 87 83 70 116 102 101 85 86 103 92 90 127 121 88 110 79 102 71 112 81 92 102 92 108 104 104 123 121 116 109 89 94 116 67 98 115 87 103 106 75 103 110 97 70 85 109 113 94 103 102 87 116 102 86 96 106 98 84 110 94 97 77 84 82 101 101 102 142 100 93 133 98 81 102 97 110 95 106 104 80 106 119 104 91 100 103 85 147 104 107 84 102 126 102 102 124 92 77 106 92 136 93 92 93 121 88 123 87 88 97 111 89 118 100 105 101 103 118 104 92 79 88 121 109 72 90 93 82 88 98 90 99 105 91 144 96 102 97 117 91 98 83 77 86 88 99 110 135 92 98 105 92 116 102 97 106 104 84 107 76 126 95 91 120 102 99 110 105 101 115 101 89 109 115 126 117 89 103 112 128 78 122 100 95 85 119 82 78 85 89 103 131 100 85 113 112 112 99 95 115 112 104 94 82 99 90 119 88 124 102 101 112 141 119 99 84 105 86 103 93 118 79 83 97 103 89 109
1 110 110 90 89 129 94 105 90 114 85 100 112 87 81 94 103 103 97 84 121 135 119 120 118 126 97 116 94 102 137 117 91 96 111 121 104 109 100 104 83 107 112 94 107 113 110 120 105 82 86 88 111 101 89 131 121 96 97 84 103 123 106 95 88 133 118 86 104 107 108 106 111 86 82 123 98 103 90 85 95 111 101 107 85 95 104 97 99 118 119 101 118 75 116 92 90 98 113 104 82 96 95 94 87 136 115 110 109 93 58 107 101 84 109 113 96 83 99 102 113 99 117 107 90 98 89 91 94 57 106 84 118 93 96 93 99 103 115 97 84 103 93 97 86 91 114 119 102 90 124 67 110 110 81 105 94 85 114 135 116 120 82 104 119 88 125 101 91 90 110 124 101 105 87 111 87 92 97 81 114 95 110 91 118 93 101 105 102 87 90 101 104 89 105 98 92 91 89 113 89 107 110 109 111 116 101 113 120 93 91 122 79 82 152 95 92 91 100 117 116 100 83 67 106 85 104 116 84 109 105 95 74 89 119 94 119 108 111 85 99 90 105 96 76 93 104 86 106 89 110 123 91 95 116 106 84 93 126 92 102 90 79 85 105 94 82 93 112 102 107 79 108 108 105 100 77 74 95 82 98 102 109 88 120 82 112 100 88 112 96 111 79 95 88 86 98 103 100 121 99 88 88 86 87 106 103 100 114 102 94 103 109 81 105 87 88 102 103 98 104 74 103 132 88 103 82 102 102 87 105 89 90 95 92 87 116 106 95 74 95 111 97 94 85 101 88 63 93 106 113 96 115 109 85 100 130 100 127 107 101 99 77 86 93 105 99 83 111 85 107 109 95 95 102 70 97 90 105 95 84 97 111 93 103 81 108 105 93 96 109 86 87 97 87 86 83 106 96 83 105 95 96 95 91 90 91 103 100 104 91 83 115 100 89 88 88 89 132 135 73 81 100 101 115 91 109 77 87 113 118 104 102 105 101 111 104 119 107 93 101 106 102 104 110 126 118 90 92 107 92 85 101 88 66 83 98 99 101 109 109 108 100 124 109 119 111 114 111 93 119 113 90 116 84 90 105 110 112 89 102 99 99 121 106 133 99 65 105 110 93 102 88 101 118 108 117 100 87 112 87 85 72 115 93 119 79 110 94 87 83 109 98 98 107 98 94 103 117 107 78 87 118 80 110 125 92 92 96 61 109 125 121 88 92 93 109 107 99 81 122 126 98 91 97 117 88 90 110 119 85 78 103 85 92 131 96 89 93 112 104 95 114 89 85 102 91 123 82 110 97 92 79 96 80 74 72 92 90 95 90 76 101 98 115 95 102 96 101 96 90 106 102 82 80 103 105 114 122 99 120 77 110 106 96 89 122 77 88 119 85 81 102 90 130 107 113 90 96 96 86 86 107 77 94 102 93 91 104 102 116 86 126 88 80 107 95 101 116 93 120 119 86 105 104 108 99 77 112 117 119 113 109 80 118 109 110 109 103 99 81 92 104 98 83 98 96 101 93 92 97 100 83 86 112 81 94 98 110 111 107 118 109 87 100 79 106 103 85 100 87 102 113 105 94 107 88 96 93 89 95 91 127 133 109 91 120 107 118 115 92 83 92 92 114 95 88 90 112 116 103 104 87 105 120 91 123 104 79 95 91 95 111 66 112 105 90 127 97 98 114 100 98 88 100 77 109 121 147 124 88 125 100 128 102 86 103 107 87 110 98 105 96 91 106 125 104 89 122 100 125 110 90 99 97 101 93 74 86 89 107 110 114 78 123 159 127 105 114 82 106 92 103 96 89 93 73 89 101 93 95
2 90 112 72 105 112 104 122 87 89 97 112 100 74 96 90 108 93 107 88 112 119 101 144 105 104 89 126 89 121 107 113 80 126 110 100 114 115 94 98 92 95 100 93 99 95 131 109 110 101 83 96 104 98 85 117 96 69 92 111 114 103 106 107 102 119 109 110 105 115 121 102 107 71 95 122 111 105 104 87 93 110 118 106 91 105 111 101 97 92 116 106 99 72 115 109 112 116 102 106 92 96 96 68 105 117 78 87 117 109 90 109 75 71 104 114 146 95 100 96 95 93 118 97 117 119 102 88 87 95 79 83 139 103 91 94 96 104 126 96 101 120 94 99 97 99 105 135 109 109 96 89 113 105 89 76 85 86 97 115 117 109 99 133 129 108 100 106 97 89 102 116 105 108 84 100 101 101 112 87 99 82 105 93 96 94 89 89 96 80 108 92 73 82 100 79 101 90 103 106 61 113 133 131 107 123 122 106 101 86 100 122 81 97 131 112 101 97 106 126 110 121 96 88 101 97 95 106 90 93 86 91 98 93 98 91 116 105 108 91 89 82 91 82 99 100 75 109 103 120 104 133 97 93 120 102 87 117 87 64 91 93 88 72 105 127 78 76 80 102 106 96 99 110 73 90 96 87 126 79 108 87 88 74 125 97 141 101 92 107 110 118 101 100 93 70 98 87 106 108 106 99 76 107 98 101 101 98 119 105 116 73 104 104 91 79 90 106 90 117 92 102 91 126 99 93 86 91 79 69 101 107 101 109 99 78 120 101 108 71 96 90 101 99 90 105 97 67 116 120 116 98 97 117 99 75 102 101 112 113 102 100 89 98 86 105 94 87 104 102 104 97 97 103 96 93 109 99 106 111 93 88 112 104 89 86 110 95 94 84 98 120 90 101 86 102 84 96 101 78 92 106 110 116 93 104 113 110 88 89 97 86 125 114 85 69 91 92 131 128 99 98 106 90 104 91 106 118 95 91 112 102 87 89 91 112 112 124 103 106 110 113 88 117 80 105 98 84 105 93 85 77 104 69 73 78 93 114 89 95 112 103 95 120 105 114 97 105 116 83 95 105 85 98 87 100 83 96 105 111 93 120 97 106 95 123 105 79 93 105 95 100 114 100 109 98 98 103 101 102 95 74 89 103 83 98 82 77 110 91 78 121 102 112 120 83 120 111 102 88 77 69 117 70 120 91 118 95 94 83 91 134 95 91 110 85 112 106 91 83 102 130 103 71 96 101 96 96 117 104 102 84 103 97 97 103 105 95 91 124 135 102 122 89 107 106 82 104 88 107 79 111 87 81 108 83 77 94 87 80 93 85 99 102 107 113 113 82 95 85 99 97 97 94 95 111 73 110 101 101 124 89 113 109 97 68 85 87 106 97 96 102 114 78 91 100 97 97 68 101 85 118 105 80 96 114 104 114 115 108 128 82 109 94 89 105 87 93 84 75 98 111 89 102 116 99 96 90 89 100 105 121 124 89 104 107 93 99 109 98 89 107 99 130 98 83 109 97 108 101 103 94 101 72 109 111 89 92 121 96 102 93 93 102 107 88 109 96 95 106 113 103 114 109 84 96 102 102 91 91 96 98 95 124 85 105 109 109 103 114 119 98 85 98 98 94 99 100 80 96 82 109 94 107 118 99 77 103 81 86 107 92 112 98 108 90 88 129 71 78 129 127 101 99 122 106 108 107 147 122 71 98 122 119 85 98 99 122 83 116 84 96 99 78 114 144 117 103 116 109 126 92 98 111 108 89 79 97 113 90 94 102 109 106 115 113 151 104 103 94 105 105 107 100 110 80 76 104 106 94 96
3 103 103 79 81 104 76 112 103 71 67 102 91 96 90 102 105 112 106 85 99 139 134 114 109 100 94 103 95 114 134 117 73 106 103 97 97 101 99 104 78 100 106 84 96 103 119 123 104 95 90 88 104 104 102 85 102 92 109 89 113 110 110 115 90 119 91 88 95 100 111 86 121 101 103 128 109 95 110 116 90 95 99 106 90 96 103 100 107 89 108 88 118 82 119 103 107 97 106 114 112 99 126 97 99 129 89 90 96 96 71 92 85 81 87 102 121 71 91 89 116 97 114 111 103 91 96 73 80 87 83 85 116 94 98 108 121 109 97 87 97 106 93 80 102 98 120 123 109 102 100 83 100 120 83 84 92 81 110 106 128 103 91 115 117 95 120 87 102 106 97 115 100 90 78 106 102 86 118 94 112 72 96 113 116 115 89 87 97 85 104 83 101 60 113 92 94 104 112 112 86 122 140 128 119 132 92 90 102 87 98 101 77 89 137 103 94 94 95 123 103 112 91 83 131 105 87 113 107 101 81 93 69 88 108 105 115 103 112 82 119 99 81 101 95 96 107 89 94 103 114 100 90 85 130 115 85 112 89 89 114 92 107 73 102 124 85 87 107 96 104 85 122 108 73 93 82 96 112 77 107 124 107 83 117 99 112 93 81 112 105 113 92 97 100 92 107 89 86 97 102 109 84 82 103 101 105 89 92 102 83 91 127 104 102 101 83 97 100 103 101 109 94 96 97 96 97 81 96 99 108 94 97 101 98 94 120 85 100 91 111 105 127 107 85 104 105 61 86 107 130 101 102 112 96 100 96 110 79 115 84 101 99 111 91 118 78 94 96 88 118 100 103 94 84 82 105 87 78 93 98 98 111 97 90 93 108 94 91 110 108 103 81 91 92 88 73 104 101 76 115 114 87 95 98 97 89 126 104 71 96 85 116 117 88 89 100 101 124 126 81 74 72 107 113 92 95 86 91 99 94 107 99 105 124 97 100 127 108 102 95 109 91 135 95 109 100 89 98 127 94 83 100 92 76 81 82 107 100 86 100 84 112 114 116 112 97 118 115 86 81 92 88 101 92 96 79 102 121 93 81 89 82 105 95 135 111 88 116 105 91 83 97 89 105 96 120 111 96 125 93 75 90 99 102 104 72 95 91 84 73 118 94 110 99 114 104 109 97 100 92 79 121 75 99 129 110 124 96 72 86 116 97 91 86 89 99 100 81 110 98 119 126 85 94 122 120 115 116 97 110 93 127 96 89 97 89 85 88 113 81 99 121 92 99 109 84 112 86 109 90 100 68 107 90 90 100 100 87 89 89 79 100 98 104 118 111 91 117 97 87 102 93 90 96 94 80 108 100 110 106 104 102 85 108 93 126 81 110 116 70 105 106 78 117 118 105 91 77 101 107 100 118 112 105 99 114 89 96 106 108 87 109 97 95 80 94 91 79 89 99 110 102 112 129 102 94 74 96 115 130 107 104 91 116 114 115 99 89 99 97 92 93 124 93 78 103 74 97 114 116 87 109 91 109 84 96 90 102 99 111 99 112 83 106 93 124 84 99 108 93 98 114 91 66 95 104 96 87 105 91 73 101 110 106 93 98 101 121 123 116 96 100 101 112 95 77 94 97 111 93 137 109 114 108 100 128 104 86 89 83 102 92 72 119 119 97 121 111 91 103 113 105 91 117 90 131 103 141 112 87 108 93 111 101 94 87 90 89 108 74 121 106 93 92 130 106 107 120 104 133 110 105 126 115 99 85 83 123 112 116 122 103 127 103 112 137 101 93 78 109 108 114 97 88 85 76 102 122 83 105
4 97 104 96 78 125 93 105 101 100 70 91 90 74 88 98 105 98 112 92 112 115 103 125 137 122 89 100 101 129 108 111 86 125 114 136 101 111 119 101 93 80 94 100 117 89 83 116 89 92 90 106 105 91 80 108 112 93 93 94 126 107 103 116 90 138 111 98 111 139 120 96 110 104 104 123 114 110 114 112 97 111 100 113 76 99 110 97 133 81 123 109 106 97 98 108 101 113 86 98 110 83 106 100 114 134 93 110 99 81 72 103 90 93 94 122 113 83 103 91 83 88 90 100 96 90 96 78 90 97 91 95 119 105 84 92 99 89 113 101 108 119 77 94 123 95 100 127 122 80 113 90 105 105 94 89 100 77 114 121 114 114 94 121 101 114 106 110 91 91 112 107 98 88 84 112 84 97 101 80 120 92 84 87 101 91 109 97 64 84 104 90 100 89 104 86 93 92 104 107 71 99 140 97 114 125 108 97 108 88 95 114 76 82 116 111 98 108 98 115 108 74 99 74 112 125 80 114 89 78 82 83 85 83 102 109 102 107 91 103 110 105 89 101 77 99 88 68 94 101 98 104 100 111 126 90 102 97 91 75 108 102 134 79 107 105 88 101 113 92 117 87 99 102 86 102 102 89 106 78 109 97 110 87 101 98 118 100 89 131 105 88 88 95 102 99 91 98 92 99 92 87 70 85 106 105 98 103 105 105 105 92 104 75 86 111 107 125 129 103 84 101 108 116 93 91 107 82 99 85 95 89 93 100 116 81 120 93 119 75 93 106 92 109 79 108 98 66 117 123 121 107 105 95 87 82 94 97 102 121 104 82 82 102 94 127 101 76 119 100 87 81 100 93 88 104 113 93 103 92 87 100 98 101 90 99 90 95 86 100 100 84 73 106 108 99 80 96 129 82 109 109 86 104 96 88 108 118 88 79 109 78 125 112 84 79 81 100 116 127 77 81 94 94 111 95 107 98 94 105 102 102 83 98 100 96 101 128 107 102 88 114 93 94 80 90 110 97 108 119 87 86 112 97 100 97 105 94 98 95 100 114 87 135 94 116 86 109 101 96 93 97 89 121 88 86 75 95 104 85 96 106 98 147 89 120 121 67 100 99 102 92 89 84 109 89 91 115 98 86 81 84 95 110 109 113 74 118 110 76 100 115 109 133 88 88 100 86 99 107 90 99 95 89 97 96 101 93 95 67 99 121 102 84 98 100 114 77 90 96 121 114 101 74 91 109 105 93 124 105 92 102 101 120 90 90 91 112 92 110 101 112 100 93 104 95 92 91 75 89 73 91 84 119 93 92 77 83 104 87 101 75 94 116 101 113 114 60 106 83 90 101 123 98 94 97 56 95 113 97 120 103 115 94 97 76 111 72 89 111 95 103 136 66 101 106 114 106 88 92 95 104 107 109 82 103 94 95 122 100 101 94 111 99 79 97 85 76 105 110 96 114 97 72 124 86 83 88 103 110 131 118 109 82 99 100 96 101 96 94 109 123 103 109 89 91 95 95 108 124 91 98 89 88 103 92 104 89 124 105 104 96 101 100 112 74 124 99 77 86 113 106 101 107 80 90 116 101 108 88 87 87 121 116 108 99 91 109 133 100 127 88 111 69 113 115 86 94 109 79 107 130 114 125 126 91 93 108 95 98 94 111 109 75 125 88 104 130 102 101 108 106 98 117 116 76 112 111 148 111 69 101 107 121 111 93 115 114 91 112 93 102 71 95 102 125 134 98 100 122 116 101 93 106 116 98 100 84 117 114 98 88 122 113 111 129 127 96 107 74 107 100 125 103 95 94 76 101 141 100 91

We plot the simulated mean with the standard deviation as well as the 'original' mutational burden

We can see that the mean and the original mutational burden are overlapping very well with 500 simulated sets


In [78]:
sample_muts_stats = pd.concat([sim_mean, sim_mean + sim_std, sim_mean - sim_std, sample_mut_orig], axis=1).sort_values(0).transpose()
sample_muts_stats.head()
sample_muts_stats['simulated'] = ['mean', 'sim_mean + std', 'sim_mean - std', 'original']
pylab.rcParams['figure.figsize'] = 22, 10

parallel_coordinates(sample_muts_stats, 'simulated', axvlines=False)


Out[78]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f9f900c0908>